# -*- coding:utf8 -*-
import importlib
import re, sys, json, datetime, random, time
from scrapy.selector import Selector
from scrapy.exceptions import CloseSpider
try:
    from scrapy.spiders import Spider
except:
    from scrapy.spider import BaseSpider as Spider

from scrapy.http import Request, FormRequest
from scrapy.utils.response import get_base_url
from scrapy.utils.url import urljoin_rfc

from gaokaopai.items import *
from gaokaopai.dao import *
from gaokaopai.util import *

importlib.reload(sys)
# sys.setdefaultencoding("utf-8")

class DazhuanSpider(Spider):
    name        = 'gaokaopai_dazhuan'
    allow       = ['gaokaopai.com']

    def __init__(self, *args, **kwargs):
        super(DazhuanSpider, self).__init__(*args, **kwargs)

    def start_requests(self):
        yield Request("http://www.gaokaopai.com/daxue-0-0-0-2-0-0-0.html", callback=self.parse_list, dont_filter=True)

    def parse_list(self, response):
        base_url = get_base_url(response)

        for item_dom in response.xpath(u"//div[contains(@class, 'schoolList')]/ul/li"):
            url = ''.join(item_dom.xpath(u".//h3/a/@href").extract()).strip()

            # 学院id
            fid = getNum(getCode(url, 2))

            updateDaUniversity(fid, 2)

        '''分页效果'''
        next_page = ''.join(response.xpath(u"//div[contains(@class, 'pager')]/a[contains(text(), '下一页')]/@href").extract())
        if next_page != '':
            yield Request(urljoin_rfc(base_url, next_page), callback=self.parse_list, dont_filter=True)